rm(list = ls())
library(data.table)
library(plyr)
library(tidyr)
library(lfe)
library(stargazer)
library(xtable)
library(sandwich)
library(roll)
library(readxl)
library(readr)
library(zoo)
library(texreg)
library(DescTools)
library(ggplot2)

m <- data.table(read_xlsx("../Data/MasterData.xlsx", skip = 1, guess_max = 1e4))
m[, age := 2021 - year]

## merge in SH daily index
sh <- data.table(read_xlsx("../Data/SHIndex.xlsx", guess_max = 1e4))
sh <- sh[, .(Idxtrd01, Idxtrd05)]
names(sh) <- c("date", "SHindex")
sh <- sh[-1]
sh[, date := as.Date(date)]
setorder(sh, date)

sh[, ym := as.yearmon(date)]
sh <- sh[, .(SHindex = SHindex[.N]), by = .(ym)]
sh[, SHindex := as.numeric(SHindex)]
sh[, SHindex := SHindex]

m[, memrableperiod_begin := as.yearmon(memrableperiod_begin, "%Ym%m")]
m[, memrableperiod_end := as.yearmon(memrableperiod_end, "%Ym%m")]

###########################################################################
# Distribution of recalled episodes
###########################################################################

## Figure 3a
tmp <- table(m[type == 0]$memrableperiod_begin) * 10
C <- data.table(tmp)
names(C) <- c("ym", "count")
C[, ym := as.yearmon(ym)]
setorder(C, ym)
C <- merge(C, sh, by = c("ym"), all.y = T)

max(C$count, na.rm = T)
ggplot(C) +
  xlab("") +
  ylab("") +
  geom_bar(aes(x = ym, y = count), stat = "identity") +
  geom_line(aes(x = ym, y = SHindex), color = "blue") +
  theme_minimal() +
  ylim(c(0, 8000)) +
  scale_x_continuous(expand = c(0, 0), limits = c(1992, 2022))
ggsave("../Figures/pic-datestart.pdf", width = 6, height = 4)

tmp <- table(m[type == 0]$memrableperiod_end) * 10
C <- data.table(tmp)
names(C) <- c("ym", "count")
C[, ym := as.yearmon(ym)]
setorder(C, ym)
C <- merge(C, sh, by = c("ym"), all.y = T)

max(C$count, na.rm = T)
ggplot(C) +
  xlab("") +
  ylab("") +
  geom_bar(aes(x = ym, y = count), stat = "identity") +
  geom_line(aes(x = ym, y = SHindex), color = "blue") +
  theme_minimal() +
  ylim(c(0, 8000)) +
  scale_x_continuous(expand = c(0, 0), limits = c(1992, 2022))
ggsave("../Figures/pic-dateend.pdf", width = 6, height = 4)

## Figure 3b: more experienced subsample
m_exp <- m[experience >= 13]

tmp <- table(m_exp[type == 0]$memrableperiod_begin) * 40
C <- data.table(tmp)
names(C) <- c("ym", "count")
C[, ym := as.yearmon(ym)]
setorder(C, ym)
C <- merge(C, sh, by = c("ym"), all.y = T)

max(C$count, na.rm = T)
ggplot(C) +
  xlab("") +
  ylab("") +
  geom_bar(aes(x = ym, y = count), stat = "identity") +
  geom_line(aes(x = ym, y = SHindex), color = "blue") +
  theme_minimal() +
  ylim(c(0, 8000)) +
  scale_x_continuous(expand = c(0, 0), limits = c(1992, 2022))
ggsave("../Figures/pic-datestart_exp.pdf", width = 6, height = 4)

tmp <- table(m_exp[type == 0]$memrableperiod_end) * 40
C <- data.table(tmp)
names(C) <- c("ym", "count")
C[, ym := as.yearmon(ym)]
setorder(C, ym)
C <- merge(C, sh, by = c("ym"), all.y = T)

max(C$count, na.rm = T)
ggplot(C) +
  xlab("") +
  ylab("") +
  geom_bar(aes(x = ym, y = count), stat = "identity") +
  geom_line(aes(x = ym, y = SHindex), color = "blue") +
  theme_minimal() +
  ylim(c(0, 8000)) +
  scale_x_continuous(expand = c(0, 0), limits = c(1992, 2022))
ggsave("../Figures/pic-dateend_exp.pdf", width = 6, height = 4)

# Figure 5 Distribution of daily returns during the survey period
index <- data.table(read.csv("../Data/IndexRet.csv"))
index[, date := as.Date(date, format = "%d%b%Y")]
index <- index[date >= "2021-11-29"]

ggplot(index, aes(x = date, y = ret)) +
  geom_bar(stat = "identity", color = "black", fill = "white") +
  xlab("date") +
  ylab("") +
  scale_x_date(date_labels = "%Y-%m-%d") +
  theme(plot.margin = margin(.5, 1, 0, 0, "cm")) + theme_minimal()
ggsave("../Figures/pic-marketret_daily.pdf", width = 6, height = 4)
